1 /** 2 * Hash Map 3 * Copyright: © 2015 Economic Modeling Specialists, Intl. 4 * Authors: Brian Schott 5 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 6 */ 7 8 module containers.hashmap; 9 10 private import core.lifetime : move; 11 private import containers.internal.hash; 12 private import containers.internal.node : shouldAddGCRange; 13 private import std.experimental.allocator.mallocator : Mallocator; 14 private import std.traits : isBasicType, Unqual; 15 16 /** 17 * Associative array / hash map. 18 * Params: 19 * K = the key type 20 * V = the value type 21 * Allocator = the allocator type to use. Defaults to `Mallocator` 22 * hashFunction = the hash function to use on the keys 23 * supportGC = true if the container should support holding references to 24 * GC-allocated memory. 25 */ 26 struct HashMap(K, V, Allocator = Mallocator, alias hashFunction = generateHash!K, 27 bool supportGC = shouldAddGCRange!K || shouldAddGCRange!V, 28 bool storeHash = true) 29 { 30 this(this) @disable; 31 32 private import std.experimental.allocator.common : stateSize; 33 34 static if (stateSize!Allocator != 0) 35 { 36 this() @disable; 37 38 /** 39 * Use the given `allocator` for allocations. 40 */ 41 this(Allocator allocator) pure nothrow @nogc @safe 42 in 43 { 44 assert(allocator !is null, "Allocator must not be null"); 45 } 46 do 47 { 48 this.allocator = allocator; 49 } 50 51 /** 52 * Constructs an HashMap with an initial bucket count of bucketCount. bucketCount 53 * must be a power of two. 54 */ 55 this(size_t bucketCount, Allocator allocator) 56 in 57 { 58 assert(allocator !is null, "Allocator must not be null"); 59 assert((bucketCount & (bucketCount - 1)) == 0, "bucketCount must be a power of two"); 60 } 61 do 62 { 63 this.allocator = allocator; 64 initialize(bucketCount); 65 } 66 67 invariant 68 { 69 assert(allocator !is null, "Allocator must not be null"); 70 } 71 } 72 else 73 { 74 /** 75 * Constructs an HashMap with an initial bucket count of bucketCount. bucketCount 76 * must be a power of two. 77 */ 78 this(size_t bucketCount) 79 in 80 { 81 assert((bucketCount & (bucketCount - 1)) == 0, "bucketCount must be a power of two"); 82 } 83 do 84 { 85 initialize(bucketCount); 86 } 87 } 88 89 ~this() nothrow 90 { 91 scope (failure) assert(false, "HashMap destructor threw an exception"); 92 clear(); 93 } 94 95 /** 96 * Removes all items from the map 97 */ 98 void clear() 99 { 100 import std.experimental.allocator : dispose; 101 102 // always remove ranges from GC first before disposing of buckets, to 103 // prevent segfaults when the GC collects at an unfortunate time 104 static if (useGC) 105 GC.removeRange(buckets.ptr); 106 allocator.dispose(buckets); 107 108 buckets = null; 109 _length = 0; 110 } 111 112 /** 113 * Supports `aa[key]` syntax. 114 */ 115 ref opIndex(this This)(K key) 116 { 117 import std.conv : text; 118 import std.exception : enforce; 119 120 alias CET = ContainerElementType!(This, V, true); 121 size_t i; 122 auto n = find(key, i); 123 enforce(n !is null, "'" ~ text(key) ~ "' not found in HashMap"); 124 return *cast(CET*) &n.value; 125 } 126 127 /** 128 * Returns: `true` if there is an entry in this map for the given `key`, 129 * false otherwise. 130 */ 131 bool containsKey(this This)(K key) inout 132 { 133 size_t i; 134 return find(key, i) !is null; 135 } 136 137 /** 138 * Gets the value for the given key, or returns `defaultValue` if the given 139 * key is not present. 140 * 141 * Params: 142 * key = the key to look up 143 * value = the default value 144 * Returns: the value indexed by `key`, if present, or `defaultValue` otherwise. 145 */ 146 auto get(this This)(K key, lazy V defaultValue) 147 { 148 alias CET = ContainerElementType!(This, V); 149 150 size_t i; 151 auto n = find(key, i); 152 if (n is null) 153 return defaultValue; 154 return cast(CET) n.value; 155 } 156 157 /** 158 * If the given key does not exist in the HashMap, adds it with 159 * the value `defaultValue`. 160 * 161 * Params: 162 * key = the key to look up 163 * value = the default value 164 * Returns: a pointer to the value stored in the HashMap with the given key. 165 * The pointer is guaranteed to be valid only until the next HashMap 166 * modification. 167 */ 168 auto getOrAdd(this This)(K key, lazy V defaultValue) 169 { 170 alias CET = ContainerElementType!(This, V); 171 172 size_t i; 173 auto n = find(key, i); 174 if (n is null) 175 return cast(CET*) &insert(key, defaultValue).value; 176 else 177 return cast(CET*) &n.value; 178 } 179 180 /** 181 * Supports $(B aa[key] = value;) syntax. 182 */ 183 void opIndexAssign(V value, const K key) 184 { 185 insert(key, move(mutable(value))); 186 } 187 188 /** 189 * Supports $(B key in aa) syntax. 190 * 191 * Returns: pointer to the value corresponding to the given key, 192 * or null if the key is not present in the HashMap. 193 */ 194 inout(V)* opBinaryRight(string op)(const K key) inout nothrow @trusted if (op == "in") 195 { 196 size_t i; 197 auto n = find(key, i); 198 if (n is null) 199 return null; 200 return &(cast(inout) n).value; 201 } 202 203 /** 204 * Removes the value associated with the given key 205 * Returns: true if a value was actually removed. 206 */ 207 bool remove(K key) 208 { 209 size_t i; 210 auto n = find(key, i); 211 if (n is null) 212 return false; 213 static if (storeHash) 214 auto node = Node(n.hash, n.key); 215 else 216 auto node = Node(n.key); 217 immutable bool removed = buckets[i].remove(node); 218 if (removed) 219 _length--; 220 return removed; 221 } 222 223 /** 224 * Returns: the number of key/value pairs in this container. 225 */ 226 size_t length() const nothrow pure @property @safe @nogc 227 { 228 return _length; 229 } 230 231 /** 232 * Returns: `true` if there are no items in this container. 233 */ 234 bool empty() const nothrow pure @property @safe @nogc 235 { 236 return _length == 0; 237 } 238 239 /** 240 * Returns: a range of the keys in this map. 241 */ 242 auto byKey(this This)() inout @trusted 243 { 244 return MapRange!(This, IterType.key)(cast(Unqual!(This)*) &this); 245 } 246 247 /** 248 * Returns: a GC-allocated array filled with the keys contained in this map. 249 */ 250 K[] keys() const @property 251 out(result) 252 { 253 assert (result.length == _length, "Length mismatch"); 254 } 255 do 256 { 257 import std.array : appender; 258 auto app = appender!(K[])(); 259 foreach (ref const bucket; buckets) 260 { 261 foreach (ref item; bucket) 262 app.put(cast(K) item.key); 263 } 264 return app.data; 265 } 266 267 268 /** 269 * Returns: a range of the values in this map. 270 */ 271 auto byValue(this This)() inout @trusted 272 { 273 return MapRange!(This, IterType.value)(cast(Unqual!(This)*) &this); 274 } 275 276 /// ditto 277 alias opSlice = byValue; 278 279 /** 280 * Returns: a GC-allocated array containing the values contained in this map. 281 */ 282 auto values(this This)() const @property 283 out(result) 284 { 285 assert (result.length == _length, "Length mismatch"); 286 } 287 do 288 { 289 import std.array : appender; 290 auto app = appender!(ContainerElementType!(This, V)[])(); 291 foreach (ref const bucket; buckets) 292 { 293 foreach (item; bucket) 294 app.put(cast(ContainerElementType!(This, V)) item.value); 295 } 296 return app.data; 297 } 298 299 /** 300 * Returns: a range of the kev/value pairs in this map. The element type of 301 * this range is a struct with `key` and `value` fields. 302 */ 303 auto byKeyValue(this This)() inout @trusted 304 { 305 return MapRange!(This, IterType.both)(cast(Unqual!(This)*) &this); 306 } 307 308 /** 309 * Support for $(D foreach(key, value; aa) { ... }) syntax; 310 */ 311 int opApply(int delegate(const ref K, ref V) del) 312 { 313 int result = 0; 314 foreach (ref bucket; buckets) 315 foreach (ref node; bucket[]) 316 if ((result = del(*cast(K*)&node.key, *cast(V*)&node.value)) != 0) 317 return result; 318 return result; 319 } 320 321 /// ditto 322 int opApply(int delegate(const ref K, const ref V) del) const 323 { 324 int result = 0; 325 foreach (const ref bucket; buckets) 326 foreach (const ref node; bucket[]) 327 if ((result = del(*cast(K*)&node.key, *cast(V*)&node.value)) != 0) 328 return result; 329 return result; 330 } 331 332 /// ditto 333 int opApply(int delegate(ref V) del) 334 { 335 int result = 0; 336 foreach (ref bucket; buckets) 337 foreach (ref node; bucket[]) 338 if ((result = del(*cast(V*)&node.value)) != 0) 339 return result; 340 return result; 341 } 342 343 /// ditto 344 int opApply(int delegate(const ref V) del) const 345 { 346 int result = 0; 347 foreach (const ref bucket; buckets) 348 foreach (const ref node; bucket[]) 349 if ((result = del(*cast(V*)&node.value)) != 0) 350 return result; 351 return result; 352 } 353 354 mixin AllocatorState!Allocator; 355 356 private: 357 358 import std.experimental.allocator : make, makeArray; 359 import containers.unrolledlist : UnrolledList; 360 import containers.internal.storage_type : ContainerStorageType; 361 import containers.internal.element_type : ContainerElementType; 362 import containers.internal.mixins : AllocatorState; 363 import core.memory : GC; 364 365 enum bool useGC = supportGC && (shouldAddGCRange!K || shouldAddGCRange!V); 366 alias Hash = typeof({ K k = void; return hashFunction(k); }()); 367 368 static ref ContainerStorageType!T mutable(T)(ref T value) { return *cast(ContainerStorageType!T*)&value; } 369 370 enum IterType: ubyte 371 { 372 key, value, both 373 } 374 375 static struct MapRange(MapType, IterType Type) 376 { 377 static if (Type == IterType.both) 378 { 379 struct FrontType 380 { 381 ContainerElementType!(MapType, K) key; 382 ContainerElementType!(MapType, V) value; 383 } 384 } 385 else static if (Type == IterType.value) 386 alias FrontType = ContainerElementType!(MapType, V); 387 else static if (Type == IterType.key) 388 alias FrontType = ContainerElementType!(MapType, K); 389 else 390 static assert(false); 391 392 FrontType front() 393 { 394 static if (Type == IterType.both) 395 return FrontType(cast(ContainerElementType!(MapType, K)) bucketRange.front.key, 396 cast(ContainerElementType!(MapType, V)) bucketRange.front.value); 397 else static if (Type == IterType.value) 398 return cast(ContainerElementType!(MapType, V)) bucketRange.front.value; 399 else static if (Type == IterType.key) 400 return cast(ContainerElementType!(MapType, K)) bucketRange.front.key; 401 else 402 static assert(false); 403 } 404 405 bool empty() const pure nothrow @nogc @property 406 { 407 return _empty; 408 } 409 410 void popFront() pure nothrow @nogc 411 { 412 bucketRange.popFront(); 413 if (bucketRange.empty) 414 { 415 while (bucketRange.empty) 416 { 417 bucketIndex++; 418 if (bucketIndex >= hm.buckets.length) 419 { 420 _empty = true; 421 break; 422 } 423 else 424 bucketRange = hm.buckets[bucketIndex][]; 425 } 426 } 427 } 428 429 private: 430 431 this(Unqual!(MapType)* hm) 432 { 433 this.hm = hm; 434 this.bucketIndex = 0; 435 bucketRange = typeof(bucketRange).init; 436 this._empty = false; 437 438 while (true) 439 { 440 if (bucketIndex >= hm.buckets.length) 441 { 442 _empty = true; 443 break; 444 } 445 bucketRange = hm.buckets[bucketIndex][]; 446 if (bucketRange.empty) 447 bucketIndex++; 448 else 449 break; 450 } 451 } 452 453 Unqual!(MapType)* hm; 454 size_t bucketIndex; 455 typeof(hm.buckets[0].opSlice()) bucketRange; 456 bool _empty; 457 } 458 459 void initialize(size_t bucketCount = DEFAULT_BUCKET_COUNT) 460 { 461 import std.conv : emplace; 462 assert((bucketCount & (bucketCount - 1)) == 0, "bucketCount must be a power of two"); 463 464 buckets = makeArray!Bucket(allocator, bucketCount); 465 static if (useGC) 466 GC.addRange(buckets.ptr, buckets.length * Bucket.sizeof); 467 foreach (ref bucket; buckets) 468 { 469 static if (stateSize!Allocator == 0) 470 emplace(&bucket); 471 else 472 emplace(&bucket, allocator); 473 } 474 } 475 476 Node* insert(const K key, V value) 477 { 478 return insert(key, move(mutable(value)), hashFunction(key)); 479 } 480 481 Node* insert(const K key, V value, const Hash hash, const bool modifyLength = true) 482 { 483 if (buckets.length == 0) 484 initialize(); 485 immutable size_t index = hashToIndex(hash, buckets.length); 486 foreach (ref item; buckets[index]) 487 { 488 if (item.hash == hash && item.key == key) 489 { 490 item.value = move(mutable(value)); 491 return &item; 492 } 493 } 494 static if (storeHash) 495 Node node = Node(hash, cast(ContainerStorageType!K) key, move(mutable(value))); 496 else 497 Node node = Node(cast(ContainerStorageType!K) key, move(mutable(value))); 498 Node* n = buckets[index].insertAnywhere(move(node)); 499 if (modifyLength) 500 _length++; 501 if (shouldRehash()) 502 { 503 rehash(); 504 immutable newIndex = hashToIndex(hash, buckets.length); 505 foreach (ref item; buckets[newIndex]) 506 { 507 if (item.hash == hash && item.key == key) 508 return &item; 509 } 510 assert(false, "Inserted item not found after rehash"); 511 } 512 else 513 return n; 514 } 515 516 /** 517 * Returns: true if the load factor has been exceeded 518 */ 519 bool shouldRehash() const pure nothrow @safe @nogc 520 { 521 // We let this be greater than one because each bucket is an unrolled 522 // list that has more than one element per linked list node. 523 return (float(_length) / float(buckets.length)) > 1.33f; 524 } 525 526 /** 527 * Rehash the map. 528 */ 529 void rehash() @trusted 530 { 531 import std.conv : emplace; 532 immutable size_t newLength = buckets.length << 1; 533 immutable size_t newSize = newLength * Bucket.sizeof; 534 Bucket[] oldBuckets = buckets; 535 buckets = cast(Bucket[]) allocator.allocate(newSize); 536 if (newLength) 537 assert (buckets, "Bucket reallocation failed"); 538 static if (useGC) 539 GC.addRange(buckets.ptr, buckets.length * Bucket.sizeof); 540 assert (buckets.length == newLength, "Bucket reallocation length mismatch"); 541 foreach (ref bucket; buckets) 542 { 543 static if (stateSize!Allocator == 0) 544 emplace(&bucket); 545 else 546 emplace(&bucket, allocator); 547 } 548 549 foreach (ref bucket; oldBuckets) 550 { 551 foreach (ref node; bucket) 552 insert(cast(K) node.key, move(node.value), node.hash, false); 553 typeid(typeof(bucket)).destroy(&bucket); 554 } 555 static if (useGC) 556 GC.removeRange(oldBuckets.ptr); 557 allocator.deallocate(cast(void[]) oldBuckets); 558 } 559 560 inout(Node)* find(const K key, ref size_t index) inout 561 { 562 return find(key, index, hashFunction(key)); 563 } 564 565 inout(Node)* find(const K key, ref size_t index, const Hash hash) inout 566 { 567 import std.array : empty; 568 569 if (buckets.empty) 570 return null; 571 index = hashToIndex(hash, buckets.length); 572 foreach (ref r; buckets[index]) 573 { 574 if (r.hash == hash && r.key == key) 575 return cast(inout(Node)*) &r; 576 } 577 return null; 578 } 579 580 struct Node 581 { 582 bool opEquals(ref const K key) const 583 { 584 return key == this.key; 585 } 586 587 bool opEquals(ref const Node n) const 588 { 589 return this.hash == n.hash && this.key == n.key; 590 } 591 592 static if (storeHash) 593 Hash hash; 594 else 595 @property Hash hash() const { return hashFunction(key); } 596 597 ContainerStorageType!K key; 598 ContainerStorageType!V value; 599 } 600 601 alias Bucket = UnrolledList!(Node, Allocator, useGC); 602 Bucket[] buckets; 603 size_t _length; 604 } 605 606 /// 607 unittest 608 { 609 import std.uuid : randomUUID; 610 import std.range.primitives : walkLength; 611 612 auto hm = HashMap!(string, int)(16); 613 assert (hm.length == 0); 614 assert (!hm.remove("abc")); 615 hm["answer"] = 42; 616 assert (hm.length == 1); 617 assert ("answer" in hm); 618 assert (hm.containsKey("answer")); 619 hm.remove("answer"); 620 assert (hm.length == 0); 621 assert ("answer" !in hm); 622 assert (hm.get("answer", 1000) == 1000); 623 assert (!hm.containsKey("answer")); 624 hm["one"] = 1; 625 hm["one"] = 1; 626 assert (hm.length == 1); 627 assert (hm["one"] == 1); 628 hm["one"] = 2; 629 assert(hm["one"] == 2); 630 foreach (i; 0 .. 1000) 631 { 632 hm[randomUUID().toString] = i; 633 } 634 assert (hm.length == 1001); 635 assert (hm.keys().length == hm.length); 636 assert (hm.values().length == hm.length); 637 () @nogc { 638 assert (hm.byKey().walkLength == hm.length); 639 assert (hm.byValue().walkLength == hm.length); 640 assert (hm[].walkLength == hm.length); 641 assert (hm.byKeyValue().walkLength == hm.length); 642 }(); 643 foreach (v; hm) {} 644 645 auto hm2 = HashMap!(char, char)(4); 646 hm2['a'] = 'a'; 647 648 HashMap!(int, int) hm3; 649 assert (hm3.get(100, 20) == 20); 650 hm3[100] = 1; 651 assert (hm3.get(100, 20) == 1); 652 auto pValue = 100 in hm3; 653 assert(*pValue == 1); 654 } 655 656 version(emsi_containers_unittest) unittest 657 { 658 static class Foo 659 { 660 string name; 661 } 662 663 void someFunc(const scope ref HashMap!(string,Foo) map) @safe 664 { 665 foreach (kv; map.byKeyValue()) 666 { 667 assert (kv.key == "foo"); 668 assert (kv.value.name == "Foo"); 669 } 670 } 671 672 auto hm = HashMap!(string, Foo)(16); 673 auto f = new Foo; 674 f.name = "Foo"; 675 hm.insert("foo", f); 676 assert("foo" in hm); 677 } 678 679 // Issue #54 680 version(emsi_containers_unittest) unittest 681 { 682 HashMap!(string, int) map; 683 map.insert("foo", 0); 684 map.insert("bar", 0); 685 686 foreach (key; map.keys()) 687 map[key] = 1; 688 foreach (key; map.byKey()) 689 map[key] = 1; 690 691 foreach (value; map.byValue()) 692 assert(value == 1); 693 foreach (value; map.values()) 694 assert(value == 1); 695 } 696 697 version(emsi_containers_unittest) unittest 698 { 699 HashMap!(int, int, Mallocator, (int i) => i) map; 700 auto p = map.getOrAdd(1, 1); 701 assert(*p == 1); 702 *p = 2; 703 assert(map[1] == 2); 704 } 705 706 debug (EMSI_CONTAINERS) version(emsi_containers_unittest) unittest 707 { 708 import std.uuid : randomUUID; 709 import std.algorithm.iteration : walkLength; 710 import std.stdio; 711 712 auto hm = HashMap!(string, int)(16); 713 foreach (i; 0 .. 1_000_000) 714 { 715 auto str = randomUUID().toString; 716 //writeln("Inserting ", str); 717 hm[str] = i; 718 //if (i > 0 && i % 100 == 0) 719 //writeln(i); 720 } 721 writeln(hm.buckets.length); 722 723 import std.algorithm.sorting:sort; 724 ulong[ulong] counts; 725 foreach (i, ref bucket; hm.buckets[]) 726 counts[bucket.length]++; 727 foreach (k; counts.keys.sort()) 728 writeln(k, "=>", counts[k]); 729 } 730 731 // #74 732 version(emsi_containers_unittest) unittest 733 { 734 HashMap!(string, size_t) aa; 735 aa["b"] = 0; 736 ++aa["b"]; 737 assert(aa["b"] == 1); 738 } 739 740 // storeHash == false 741 version(emsi_containers_unittest) unittest 742 { 743 static struct S { size_t v; } 744 HashMap!(S, S, Mallocator, (S s) { return s.v; }, false, false) aa; 745 static assert(aa.Node.sizeof == 2 * S.sizeof); 746 } 747 748 version(emsi_containers_unittest) unittest 749 { 750 auto hm = HashMap!(string, int)(16); 751 752 foreach (v; hm) {} 753 foreach (ref v; hm) {} 754 foreach (int v; hm) {} 755 foreach (ref int v; hm) {} 756 foreach (const ref int v; hm) {} 757 758 foreach (k, v; hm) {} 759 foreach (k, ref v; hm) {} 760 foreach (k, int v; hm) {} 761 foreach (k, ref int v; hm) {} 762 foreach (k, const ref int v; hm) {} 763 764 foreach (ref k, v; hm) {} 765 foreach (ref k, ref v; hm) {} 766 foreach (ref k, int v; hm) {} 767 foreach (ref k, ref int v; hm) {} 768 foreach (ref k, const ref int v; hm) {} 769 770 foreach (const string k, v; hm) {} 771 foreach (const string k, ref v; hm) {} 772 foreach (const string k, int v; hm) {} 773 foreach (const string k, ref int v; hm) {} 774 foreach (const string k, const ref int v; hm) {} 775 776 foreach (const ref string k, v; hm) {} 777 foreach (const ref string k, ref v; hm) {} 778 foreach (const ref string k, int v; hm) {} 779 foreach (const ref string k, ref int v; hm) {} 780 foreach (const ref string k, const ref int v; hm) {} 781 782 hm["a"] = 1; 783 foreach (k, ref v; hm) { v++; } 784 assert(hm["a"] == 2); 785 } 786 787 version(emsi_containers_unittest) unittest 788 { 789 static struct S { @disable this(this); } 790 alias HM = HashMap!(int, S); 791 } 792 793 version(emsi_containers_unittest) unittest 794 { 795 struct S { int* a; } 796 alias HM = HashMap!(S, int); 797 }